__author__ = 'chobit'
# -*- coding:utf-8 -*-
from bs4 import BeautifulSoup
import requests
import time
import pymongo
import urllib
import os
class Jdly:

	#初始化函数
	def __init__(self,url):
		self.table = None
		self.url = url
		client = pymongo.MongoClient('localhost',27017)
		self.jdly_db = client['jdly']
		self.header = {
		'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36',
		'Connection':'keep-alive'
		}
		# self.structurePage(num)
		

	# #批量写入数据库
	# def insertInfo(self,func,tablename,filed):
	# 	map(func,self.readTabeDb(tablename,filed))
	# 	print tablename + '表写入完成'

	#获取每页的图集链接
	def getIndexPage(self,url):
		# url = 'http://www.jdlingyu.moe/page/165/'
		wb_date = requests.get(url,headers=self.header)
		soup = BeautifulSoup(wb_date.text,'lxml')
		list_page = soup.select('a.imageLink')
		for link in list_page:
			link_url = link.get('href')
			self.jdly_db['page_list'].insert_one({'url':link_url})
			print(link.get('href'),'写入成功')
			# print link.get('href')
	#获取每页的每个图集的每张图片下载地址
	def getImgLinks(self,url):
		# url = 'http://www.jdlingyu.moe/495/'
		img_url = []
		wb_date = requests.get(url,headers=self.header)
		soup = BeautifulSoup(wb_date.text,'lxml')
		img_list= soup.select('div.main-body > p > a')
		title = soup.select('h2.main-title')[0].text	
		for img in img_list:
			# name_list.append(str(img.get('href')).split('/')[-1])
			img_url.append(img.get('href'))
		date = {
			'title':title,
			'img_url_info':img_url
		}
		self.jdly_db['img_list'].insert_one(date)
		print(date,'写入成功')
	#构造分页列表
	def structurePage(self,Num):
		for i in range(1,Num+1):
			url = self.url + 'page/{}/'.format(i)
			self.jdly_db['page'].insert_one({'page_url':url})
			print(url)

	#下载图片
	# def downImg(self):
	# 	x = 1
	# 	client = pymongo.MongoClient('localhost',27017)
	# 	jdly = client['jdly']
	# 	img_list = jdly['img_list']
	# 	for img in img_list.find():
	# 		print('下载图集: <<%s>> ' % img['title'].encode('utf-8'))
	# 		for img_url in img['img_url']:
	# 			urllib.urlretrieve(img_url,'img/%s.jpg'% x )
	# 			x +=1
	# 			print(img_url,'---->下载成功')
	#下载图片列表方法
	def downImgList(self,filename,url):
		f = open('./img/'+filename,'wb')
		# img_db = requests.get(url,headers=self.header)
		# img = img_db.text
		# # if '-' in filename:
		img_data = urllib.request.urlopen(url).read()
		f.write(img_data)
		f.close()

		time.sleep(2)

	def DownImg(self):
		for img_url in self.jdly_db['img_list'].find():
			# print(page['page_url'])
			for img_list in img_url['img_url_info']:
				if 'sinaimg' not in img_list:
					filename = img_list.split('/')[-1]
					if filename in os.listdir('./img') or filename=='':
						print(img_list,'已下载......')
					else:
						self.downImgList(filename,img_list)
						print(img_list,'下载完成.....')
				else:
					print('还无法支持下载sinaimg服务器反爬虫机制')
			print(img_url['title'],'图集下载完成')	
	
	def Jdly_init(self):
		for img_list in self.jdly_db['page_list'].find():
			self.getImgLinks(img_list['url'])